{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd             # data package\n",
    "import matplotlib.pyplot as plt # graphics \n",
    "import datetime as dt\n",
    "import numpy as np\n",
    "\n",
    "import requests, io             # internet and input tools  \n",
    "import zipfile as zf            # zip file tools \n",
    "import os  \n",
    "\n",
    "#import weightedcalcs as wc\n",
    "#import numpy as np\n",
    "\n",
    "import pyarrow as pa\n",
    "import pyarrow.parquet as pq"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### An Alternative Concordance Mapping\n",
    "\n",
    "This notebook constructs a better concordence to go from hs6 codes to naics codes. The issue (as raised to me by another research) is that while the hs10 to naics concordence is one to one (an hs10 code mapped into one naics); if you simply truncate down to the hs6 then the concordence becomes one to many (for one hs6 code there are multiple naics codes).\n",
    "\n",
    "In the main body of ``countylevel_tariffs_and_exports`` this was ignored and the rules dictionaries were applies to create the mapping (so first an hs6 code would be mapped into a naics code, if in the creation of the dictionary it saw the same h6 code going to a different naics code, it simply overwrites the old one). \n",
    "\n",
    "The code below provides an altnerative solution. What it does is that if first merges the hs10 export data with the census concordance. Then the rule is: for one hs6 code, if there are multiple naics codes, take the one with the largest amount of trade associated with it. \n",
    "\n",
    "Utimatly this does not seem to matter at all for my results, but it is a detail to get right and be minfull of. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### First Read in the HS10 Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [],
   "source": [
    "my_key = \"&key=34e40301bda77077e24c859c6c6c0b721ad73fc7\"\n",
    "# This is my key. I'm nice and I have it posted. If you will be doing more with this\n",
    "# please get your own key!\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>E_COMMODITY</th>\n",
       "      <th>ALL_VAL_MO</th>\n",
       "      <th>time</th>\n",
       "      <th>COMM_LVL</th>\n",
       "      <th>total_trade</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0206490010</td>\n",
       "      <td>6235559</td>\n",
       "      <td>2018-05-01</td>\n",
       "      <td>HS10</td>\n",
       "      <td>6235559.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>6212200010</td>\n",
       "      <td>265561</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>HS10</td>\n",
       "      <td>265561.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>7213200000</td>\n",
       "      <td>2954764</td>\n",
       "      <td>2018-03-01</td>\n",
       "      <td>HS10</td>\n",
       "      <td>2954764.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>9114300000</td>\n",
       "      <td>22462</td>\n",
       "      <td>2018-10-01</td>\n",
       "      <td>HS10</td>\n",
       "      <td>22462.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>2844302010</td>\n",
       "      <td>0</td>\n",
       "      <td>2018-09-01</td>\n",
       "      <td>HS10</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0206490010</td>\n",
       "      <td>4501786</td>\n",
       "      <td>2017-01-01</td>\n",
       "      <td>HS10</td>\n",
       "      <td>4501786.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>2844302010</td>\n",
       "      <td>232488</td>\n",
       "      <td>2017-01-01</td>\n",
       "      <td>HS10</td>\n",
       "      <td>232488.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>2844302010</td>\n",
       "      <td>0</td>\n",
       "      <td>2018-08-01</td>\n",
       "      <td>HS10</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>6212200010</td>\n",
       "      <td>52286</td>\n",
       "      <td>2018-08-01</td>\n",
       "      <td>HS10</td>\n",
       "      <td>52286.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>0206490010</td>\n",
       "      <td>3251276</td>\n",
       "      <td>2019-03-01</td>\n",
       "      <td>HS10</td>\n",
       "      <td>3251276.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  E_COMMODITY ALL_VAL_MO       time COMM_LVL  total_trade\n",
       "0  0206490010    6235559 2018-05-01     HS10    6235559.0\n",
       "1  6212200010     265561 2018-03-01     HS10     265561.0\n",
       "2  7213200000    2954764 2018-03-01     HS10    2954764.0\n",
       "3  9114300000      22462 2018-10-01     HS10      22462.0\n",
       "4  2844302010          0 2018-09-01     HS10          0.0\n",
       "5  0206490010    4501786 2017-01-01     HS10    4501786.0\n",
       "6  2844302010     232488 2017-01-01     HS10     232488.0\n",
       "7  2844302010          0 2018-08-01     HS10          0.0\n",
       "8  6212200010      52286 2018-08-01     HS10      52286.0\n",
       "9  0206490010    3251276 2019-03-01     HS10    3251276.0"
      ]
     },
     "execution_count": 140,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "end_use = \"hs?get=E_COMMODITY,ALL_VAL_MO\"\n",
    "\n",
    "url = \"https://api.census.gov/data/timeseries/intltrade/exports/\"\n",
    "url = url + end_use + my_key + \"&time==from+2017-01\" + \"&COMM_LVL=HS10\"\n",
    "# note the change in the commodity level\n",
    "\n",
    "r = requests.get(url) \n",
    "\n",
    "df_all_trade = pd.DataFrame(r.json()[1:]) # This then converts it to a dataframe\n",
    "# Note that the first entry is the labels\n",
    "\n",
    "df_all_trade.columns = r.json()[0]\n",
    "\n",
    "df_all_trade.time = pd.to_datetime(df_all_trade.time, format=\"%Y-%m\")\n",
    "# This is so I can call this correctly...\n",
    "\n",
    "df_all_trade[\"total_trade\"] = df_all_trade.ALL_VAL_MO.astype(float)\n",
    "\n",
    "df_all_trade.E_COMMODITY = df_all_trade.E_COMMODITY.astype(str)\n",
    "\n",
    "df_all_trade.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_all_trade.set_index(\"time\", inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [],
   "source": [
    "dftrade_17 = df_all_trade.loc[\"2017\"].groupby(\"E_COMMODITY\").agg({\"total_trade\":\"sum\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_trade</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>E_COMMODITY</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0101210000</td>\n",
       "      <td>260550827.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>0101290000</td>\n",
       "      <td>188067222.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>0101300000</td>\n",
       "      <td>3274174.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>0101900000</td>\n",
       "      <td>1814439.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>0102210010</td>\n",
       "      <td>4038807.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             total_trade\n",
       "E_COMMODITY             \n",
       "0101210000   260550827.0\n",
       "0101290000   188067222.0\n",
       "0101300000     3274174.0\n",
       "0101900000     1814439.0\n",
       "0102210010     4038807.0"
      ]
     },
     "execution_count": 143,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dftrade_17.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(9151, 1)"
      ]
     },
     "execution_count": 144,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dftrade_17.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Then Read in the Census Concordance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = \"https://www.census.gov/foreign-trade/reference/codes/concordance/expconcord17.xls\"\n",
    "\n",
    "df_concordance = pd.read_excel(url, dtype = {\"commodity\": str, \"naics\": str})\n",
    "\n",
    "df_concordance[\"hs8\"] = df_concordance.commodity.str[0:8]\n",
    "# truncate down to get the hs8\n",
    "\n",
    "df_concordance[\"hs6\"] = df_concordance.commodity.str[0:6]\n",
    "# truncate down to get the hs6\n",
    "\n",
    "#df_concordance[\"naics3\"] = df_concordance[\"naics\"].str[0:3]\n",
    "\n",
    "#dict_concordance = dict(zip(df_concordance.hs6,df_concordance.naics)) \n",
    "\n",
    "# This creates a dictionaty from which we can map the hs6 to the naics codes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {},
   "outputs": [],
   "source": [
    "#df_concordance[[\"hs6\",\"naics3\"]].head()\n",
    "\n",
    "#first = df_concordance.groupby(\"hs6\")[\"naics\"].count().max()\n",
    "\n",
    "#second = df_concordance.groupby(\"naics\")[\"hs6\"].count().max()\n",
    "\n",
    "#test = df_concordance.groupby(\"hs6\")[\"naics\"].count()\n",
    "\n",
    "#test.sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Then Merge the two\n",
    "\n",
    "I'm going to do a right merge to perseve the structure of the original census concordance. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {},
   "outputs": [],
   "source": [
    "trade_conc = dftrade_17.merge(df_concordance[[\"hs6\",\"naics\",\"commodity\"]],\n",
    "                                left_index = True, right_on = \"commodity\", how = \"right\", indicator = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_trade</th>\n",
       "      <th>hs6</th>\n",
       "      <th>naics</th>\n",
       "      <th>commodity</th>\n",
       "      <th>_merge</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>260550827.0</td>\n",
       "      <td>010121</td>\n",
       "      <td>112920</td>\n",
       "      <td>0101210000</td>\n",
       "      <td>both</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>188067222.0</td>\n",
       "      <td>010129</td>\n",
       "      <td>112920</td>\n",
       "      <td>0101290000</td>\n",
       "      <td>both</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>3274174.0</td>\n",
       "      <td>010130</td>\n",
       "      <td>112920</td>\n",
       "      <td>0101300000</td>\n",
       "      <td>both</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>1814439.0</td>\n",
       "      <td>010190</td>\n",
       "      <td>112920</td>\n",
       "      <td>0101900000</td>\n",
       "      <td>both</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>4038807.0</td>\n",
       "      <td>010221</td>\n",
       "      <td>11211X</td>\n",
       "      <td>0102210010</td>\n",
       "      <td>both</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   total_trade     hs6   naics   commodity _merge\n",
       "0  260550827.0  010121  112920  0101210000   both\n",
       "1  188067222.0  010129  112920  0101290000   both\n",
       "2    3274174.0  010130  112920  0101300000   both\n",
       "3    1814439.0  010190  112920  0101900000   both\n",
       "4    4038807.0  010221  11211X  0102210010   both"
      ]
     },
     "execution_count": 148,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "trade_conc.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Then Groupby and Assign Naics Codes\n",
    "\n",
    "I will group by hs6, then for each hs6 code there may be multiple naics codes. In these cases, idea is to assign the naics code to the hs6 code based on which one has the most trade."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 149,
   "metadata": {},
   "outputs": [],
   "source": [
    "def concordance_group(df):\n",
    "    \n",
    "    #print(df.index)\n",
    "    \n",
    "    num_naics = df.shape[0]\n",
    "    # This tells me how many naics codes are there\n",
    "    \n",
    "    if num_naics == 1:\n",
    "        # if only one (like most), just grab the naics code\n",
    "        \n",
    "        max_naics = df.naics.iloc[0]\n",
    "        \n",
    "    else:\n",
    "        # if many, then grab the max trade flows...\n",
    "        \n",
    "        max_naics = df.loc[df.total_trade.idxmax()].naics\n",
    "    \n",
    "    foo = {\"num_naics\": [num_naics],\n",
    "          \"naics\": [max_naics]}\n",
    "    \n",
    "    return pd.DataFrame(foo)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {},
   "outputs": [],
   "source": [
    "grp = trade_conc.groupby(\"hs6\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_trade</th>\n",
       "      <th>hs6</th>\n",
       "      <th>naics</th>\n",
       "      <th>commodity</th>\n",
       "      <th>_merge</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>148</td>\n",
       "      <td>2373449.0</td>\n",
       "      <td>030211</td>\n",
       "      <td>112511</td>\n",
       "      <td>0302110010</td>\n",
       "      <td>both</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>149</td>\n",
       "      <td>516374.0</td>\n",
       "      <td>030211</td>\n",
       "      <td>114111</td>\n",
       "      <td>0302110090</td>\n",
       "      <td>both</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     total_trade     hs6   naics   commodity _merge\n",
       "148    2373449.0  030211  112511  0302110010   both\n",
       "149     516374.0  030211  114111  0302110090   both"
      ]
     },
     "execution_count": 158,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grp.get_group(\"030211\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This provides a good example of the issue. So two hs10 codes (0302110010, 0302110090) and two different naics codes (114111, 112511), but the same hs6 code 030211. So how do we go from hs6 to naics. I'll do it by taking the one with the most trade. So 030211 will go to 112511. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>num_naics</th>\n",
       "      <th>naics</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>112511</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   num_naics   naics\n",
       "0          2  112511"
      ]
     },
     "execution_count": 157,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "concordance_group(grp.get_group(\"030211\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {},
   "outputs": [],
   "source": [
    "mc = grp.apply(concordance_group)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {},
   "outputs": [],
   "source": [
    "mc.reset_index(inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>hs6</th>\n",
       "      <th>level_1</th>\n",
       "      <th>num_naics</th>\n",
       "      <th>naics</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>010121</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>112920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>010129</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>112920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>010130</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>112920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>010190</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>112920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>010221</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>11211X</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      hs6  level_1  num_naics   naics\n",
       "0  010121        0          1  112920\n",
       "1  010129        0          1  112920\n",
       "2  010130        0          1  112920\n",
       "3  010190        0          1  112920\n",
       "4  010221        0          4  11211X"
      ]
     },
     "execution_count": 154,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mc.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5376"
      ]
     },
     "execution_count": 155,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mc.groupby(\"hs6\")[\"naics\"].count().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {},
   "outputs": [],
   "source": [
    "file_path = os.getcwd() + \"\\\\data\"+ \"\\\\alt_concordance.parquet\"\n",
    "\n",
    "pq.write_table(pa.Table.from_pandas(mc), file_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
